1. Download Housing Dataset:¶

In [16]:
import requests

# Dataset URL
url = 'https://raw.githubusercontent.com/ageron/handson-ml2/refs/heads/master/datasets/housing/housing.csv'

# Local filename to save the CSV
filename = 'housing.csv'

# Fetch and save the CSV
response = requests.get(url)

if response.status_code == 200:
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(response.text)
    print(f"CSV file downloaded and saved as {filename}")
else:
    print(f"Failed to download file. Status code: {response.status_code}")
CSV file downloaded and saved as housing.csv

2. Basic Pre-Processing¶

In [17]:
import pandas as pd

# Open the CSV file
housing_dataset = pd.read_csv('housing.csv')

# Remove NAN
housing_dataset.dropna()

# Map Ocean Proximity
mapping = {'NEAR BAY': 0, 'NEAR OCEAN': 1, 'INLAND': 2}
#housing_dataset['ocean_proximity_encoded'] = housing_dataset['ocean_proximity'].map(mapping)
In [18]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plotting
sns.pairplot(housing_dataset, hue='ocean_proximity', 
             vars=['longitude', 'latitude', 'housing_median_age', 
                   'total_rooms', 'total_bedrooms', 'population', 
                   'households', 'median_income', 'median_house_value'],
             plot_kws={'alpha': 0.5, 's': 20})  # alpha for transparency, s for point size

plt.suptitle('Attribute Relationships Colored by Ocean Proximity', y=1.02)
plt.show()
No description has been provided for this image